Skip to content

[SystemZ] Handle f16 load positive/negative/complement without libcalls. #136286

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from

Conversation

JonPsson1
Copy link
Contributor

This can be done directly with the (64-bit) target instruction as only the sign bit is changed.

@llvmbot
Copy link
Member

llvmbot commented Apr 18, 2025

@llvm/pr-subscribers-backend-systemz

Author: Jonas Paulsson (JonPsson1)

Changes

This can be done directly with the (64-bit) target instruction as only the sign bit is changed.


Full diff: https://github.com/llvm/llvm-project/pull/136286.diff

10 Files Affected:

  • (modified) llvm/lib/Target/SystemZ/SystemZISelLowering.cpp (+2)
  • (modified) llvm/lib/Target/SystemZ/SystemZInstrFP.td (+6-2)
  • (modified) llvm/test/CodeGen/SystemZ/fp-abs-01.ll (+2-3)
  • (modified) llvm/test/CodeGen/SystemZ/fp-abs-02.ll (+12)
  • (modified) llvm/test/CodeGen/SystemZ/fp-abs-03.ll (+2-3)
  • (modified) llvm/test/CodeGen/SystemZ/fp-abs-04.ll (+3-7)
  • (modified) llvm/test/CodeGen/SystemZ/fp-mul-08.ll (+3-4)
  • (modified) llvm/test/CodeGen/SystemZ/fp-mul-10.ll (+10-11)
  • (modified) llvm/test/CodeGen/SystemZ/fp-neg-01.ll (+3-4)
  • (modified) llvm/test/CodeGen/SystemZ/fp-neg-02.ll (+3-4)
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 75cd5a319557d..746e2b1a88a17 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -554,6 +554,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::BITCAST, MVT::i16, Custom);
     setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom);
     setOperationAction(ISD::FCOPYSIGN, MVT::f16, Legal);
+    for (auto Op : {ISD::FNEG, ISD::FABS})
+      setOperationAction(Op, MVT::f16, Legal);
   }
 
   for (unsigned I = MVT::FIRST_FP_VALUETYPE;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index 7775f456bbdc1..12e99f7e5f47a 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -378,8 +378,10 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
 }
 // Generic form, which does not set CC.
 def LPDFR : UnaryRRE<"lpdfr", 0xB370, fabs, FP64,  FP64>;
-let isCodeGenOnly = 1 in
+let isCodeGenOnly = 1 in {
+  def LPDFR_16 : UnaryRRE<"lpdfr", 0xB370, fabs, FP16,  FP16>;
   def LPDFR_32 : UnaryRRE<"lpdfr", 0xB370, fabs, FP32,  FP32>;
+}
 
 // Negative absolute value (Load Negative).
 let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
@@ -389,8 +391,10 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
 }
 // Generic form, which does not set CC.
 def LNDFR : UnaryRRE<"lndfr", 0xB371, fnabs, FP64,  FP64>;
-let isCodeGenOnly = 1 in
+let isCodeGenOnly = 1 in {
+  def LNDFR_16 : UnaryRRE<"lndfr", 0xB371, fnabs, FP16,  FP16>;
   def LNDFR_32 : UnaryRRE<"lndfr", 0xB371, fnabs, FP32,  FP32>;
+}
 
 // Square root.
 let Uses = [FPC], mayRaiseFPException = 1 in {
diff --git a/llvm/test/CodeGen/SystemZ/fp-abs-01.ll b/llvm/test/CodeGen/SystemZ/fp-abs-01.ll
index 0cfdefe3bd61b..fe573f1e3587e 100644
--- a/llvm/test/CodeGen/SystemZ/fp-abs-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-abs-01.ll
@@ -7,10 +7,9 @@
 declare half @llvm.fabs.f16(half %f)
 define half @f0(half %f) {
 ; CHECK-LABEL: f0:
-; CHECK:      brasl %r14, __extendhfsf2@PLT
+; CHECK:      # %bb.0:
 ; CHECK-NEXT: lpdfr %f0, %f0
-; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT
-; CHECK: br %r14
+; CHECK-NEXT: br %r14
   %res = call half @llvm.fabs.f16(half %f)
   ret half %res
 }
diff --git a/llvm/test/CodeGen/SystemZ/fp-abs-02.ll b/llvm/test/CodeGen/SystemZ/fp-abs-02.ll
index 4266a893e8a3b..752609ef6d00d 100644
--- a/llvm/test/CodeGen/SystemZ/fp-abs-02.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-abs-02.ll
@@ -3,6 +3,18 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
 
+; Test f16.
+declare half @llvm.fabs.f16(half %f)
+define half @f0(half %f) {
+; CHECK-LABEL: f0:
+; CHECK:      # %bb.0:
+; CHECK-NEXT: lndfr %f0, %f0
+; CHECK-NEXT: br %r14
+  %abs = call half @llvm.fabs.f16(half %f)
+  %res = fneg half %abs
+  ret half %res
+}
+
 ; Test f32.
 declare float @llvm.fabs.f32(float %f)
 define float @f1(float %f) {
diff --git a/llvm/test/CodeGen/SystemZ/fp-abs-03.ll b/llvm/test/CodeGen/SystemZ/fp-abs-03.ll
index 29f2d06e75ff9..029ae2309cab9 100644
--- a/llvm/test/CodeGen/SystemZ/fp-abs-03.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-abs-03.ll
@@ -6,10 +6,9 @@
 declare half @llvm.fabs.f16(half %f)
 define half @f0(half %f) {
 ; CHECK-LABEL: f0:
-; CHECK:      brasl %r14, __extendhfsf2@PLT
+; CHECK:      # %bb.0:
 ; CHECK-NEXT: lpdfr %f0, %f0
-; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT
-; CHECK: br %r14
+; CHECK-NEXT: br %r14
   %res = call half @llvm.fabs.f16(half %f)
   ret half %res
 }
diff --git a/llvm/test/CodeGen/SystemZ/fp-abs-04.ll b/llvm/test/CodeGen/SystemZ/fp-abs-04.ll
index afaf3f6d22ac8..fbb43b69371f3 100644
--- a/llvm/test/CodeGen/SystemZ/fp-abs-04.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-abs-04.ll
@@ -6,13 +6,9 @@
 declare half @llvm.fabs.f16(half %f)
 define half @f0(half %f) {
 ; CHECK-LABEL: f0:
-; CHECK:      brasl   %r14, __extendhfsf2@PLT
-; CHECK-NEXT: lpdfr   %f0, %f0
-; CHECK-NEXT: brasl   %r14, __truncsfhf2@PLT
-; CHECK-NEXT: brasl   %r14, __extendhfsf2@PLT
-; CHECK-NEXT: lcdfr   %f0, %f0
-; CHECK-NEXT: brasl   %r14, __truncsfhf2@PLT
-; CHECK: br %r14
+; CHECK:      # %bb.0:
+; CHECK-NEXT: lndfr %f0, %f0
+; CHECK-NEXT: br %r14
   %abs = call half @llvm.fabs.f16(half %f)
   %res = fneg half %abs
   ret half %res
diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-08.ll b/llvm/test/CodeGen/SystemZ/fp-mul-08.ll
index 2b18abec8d555..e739bddd4f18f 100644
--- a/llvm/test/CodeGen/SystemZ/fp-mul-08.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-mul-08.ll
@@ -8,13 +8,12 @@ declare float @llvm.fma.f32(float %f1, float %f2, float %f3)
 
 define half @f0(half %f1, half %f2, half %acc) {
 ; CHECK-LABEL: f0:
-; CHECK: brasl %r14, __extendhfsf2@PLT
-; CHECK: lcdfr %f0, %f0
-; CHECK: brasl %r14, __truncsfhf2@PLT
+; CHECK-NOT: brasl
+; CHECK: lcdfr %f{{[0-9]+}}, %f4
 ; CHECK: brasl %r14, __extendhfsf2@PLT
 ; CHECK: brasl %r14, __extendhfsf2@PLT
 ; CHECK: brasl %r14, __extendhfsf2@PLT
-; CHECK-SCALAR: maebr %f0, %f9, %f8
+; CHECK-SCALAR: maebr %f0, %f8, %f10
 ; CHECK-VECTOR: wfmasb %f0, %f0, %f8, %f10
 ; CHECK: brasl %r14, __truncsfhf2@PLT
 ; CHECK: br %r14
diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-10.ll b/llvm/test/CodeGen/SystemZ/fp-mul-10.ll
index 1ecf52fbde354..8f2cd23112cd0 100644
--- a/llvm/test/CodeGen/SystemZ/fp-mul-10.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-mul-10.ll
@@ -30,10 +30,10 @@ define half @f3_half(half %f1, half %f2, half %acc) {
 ; CHECK: brasl %r14, __extendhfsf2@PLT
 ; CHECK: wfmasb %f0, %f0, %f8, %f10
 ; CHECK: brasl %r14, __truncsfhf2@PLT
-; CHECK: brasl %r14, __extendhfsf2@PLT
-; CHECK: lcdfr %f0, %f0
-; CHECK: brasl %r14, __truncsfhf2@PLT
-; CHECK: br %r14
+; CHECK-NOT: brasl
+; CHECK:      lcdfr %f0, %f0
+; CHECK-NEXT: lmg
+; CHECK-NEXT: br %r14
   %res = call half @llvm.fma.f16 (half %f1, half %f2, half %acc)
   %negres = fneg half %res
   ret half %negres
@@ -50,18 +50,17 @@ define float @f3(float %f1, float %f2, float %acc) {
 
 define half @f4_half(half %f1, half %f2, half %acc) {
 ; CHECK-LABEL: f4_half:
-; CHECK: brasl %r14, __extendhfsf2@PLT
-; CHECK: lcdfr %f0, %f0
-; CHECK: brasl %r14, __truncsfhf2@PLT
+; CHECK-NOT: brasl
+; CHECK: lcdfr %f0, %f4
 ; CHECK: brasl %r14, __extendhfsf2@PLT
 ; CHECK: brasl %r14, __extendhfsf2@PLT
 ; CHECK: brasl %r14, __extendhfsf2@PLT
 ; CHECK: wfmasb %f0, %f0, %f8, %f10
 ; CHECK: brasl %r14, __truncsfhf2@PLT
-; CHECK: brasl %r14, __extendhfsf2@PLT
-; CHECK: lcdfr %f0, %f0
-; CHECK: brasl %r14, __truncsfhf2@PLT
-; CHECK: br %r14
+; CHECK-NOT: brasl
+; CHECK:      lcdfr %f0, %f0
+; CHECK-NEXT: lmg
+; CHECK-NEXT: br %r14
   %negacc = fneg half %acc
   %res = call half @llvm.fma.f16 (half %f1, half %f2, half %negacc)
   %negres = fneg half %res
diff --git a/llvm/test/CodeGen/SystemZ/fp-neg-01.ll b/llvm/test/CodeGen/SystemZ/fp-neg-01.ll
index a8fe8d5da7c8a..0e19d9647178f 100644
--- a/llvm/test/CodeGen/SystemZ/fp-neg-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-neg-01.ll
@@ -6,10 +6,9 @@
 ; Test f16.
 define half @f0(half %f) {
 ; CHECK-LABEL: f0:
-; CHECK: brasl %r14, __extendhfsf2@PLT
-; CHECK: lcdfr %f0, %f0
-; CHECK: brasl %r14, __truncsfhf2@PLT
-; CHECK: br %r14
+; CHECK:      # %bb.0:
+; CHECK-NEXT: lcdfr %f0, %f0
+; CHECK-NEXT: br %r14
   %res = fneg half %f
   ret half %res
 }
diff --git a/llvm/test/CodeGen/SystemZ/fp-neg-02.ll b/llvm/test/CodeGen/SystemZ/fp-neg-02.ll
index 848c4740d8540..d0802878f8f8b 100644
--- a/llvm/test/CodeGen/SystemZ/fp-neg-02.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-neg-02.ll
@@ -5,10 +5,9 @@
 ; Test f16.
 define half @f0(half %f) {
 ; CHECK-LABEL: f0:
-; CHECK: brasl %r14, __extendhfsf2@PLT
-; CHECK: lcdfr %f0, %f0
-; CHECK: brasl %r14, __truncsfhf2@PLT
-; CHECK: br %r14
+; CHECK:      # %bb.0:
+; CHECK-NEXT: lcdfr %f0, %f0
+; CHECK-NEXT: br %r14
   %res = fneg half %f
   ret half %res
 }

@tgross35
Copy link
Contributor

Was the ABI for f16 published anywhere public by the way? I'm trying to understand how l*dfr could work for both f32 and f16, I'm assuming they are passed in the same registers and aligned at the MSB.

@uweigand
Copy link
Member

Was the ABI for f16 published anywhere public by the way? I'm trying to understand how l*dfr could work for both f32 and f16, I'm assuming they are passed in the same registers and aligned at the MSB.

It's not yet published (we're planning on doing this soon), but you're indeed correct that they are passed in the same registers as f32 and 64, and aligned at the MSB. (That's why the same set of instructions already works for both f32 and f64.)

Copy link
Member

@uweigand uweigand left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM, thanks!

@@ -554,6 +554,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Legal);
for (auto Op : {ISD::FNEG, ISD::FABS})
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor nit: maybe add FCOPYSIGN to the loop then?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants